2 Available Data

3 Clean up data

  • transform dates
  • transform units
  • remove NA records

5 seasonal, weekly, and hourly patterns of NO2 etc in germany.

  • data from 1970 -> 2020
  • only kiel
  • summary=1 -> hourly data
  • filtered for top 10 air pollutants over the complete timeframe
  • timeframes: hourly/weekdays/monthly
  • avg air pollution over timeframes
  • normalized data to be able to compare the different air pollutants
grid.arrange(
  # p1 - hours
  df %>% filter(variable_combined==filter_variables$variable_combined, summary==1) %>% group_by(variable_combined, hour) %>% summarise(avg_value=mean(value)) %>% mutate(avg_value=scale(avg_value)) %>%
  ggplot(aes(x=hour, y=variable_combined, fill=avg_value)) +
  geom_tile() +
  theme_minimal() +
  scale_fill_distiller(palette = "Reds", direction = 1, breaks = seq(-1.5, 2, 3.5), labels = c("low", "high")) +
  labs(x="Time (h)", y="Air Pollutants", title="Hourly, weekly and monthly patterns of chemical concentrations in the air in Kiel over all years", fill="avg chemical\nconcentration") +
  theme(plot.title = element_text(size=14, face="bold", hjust = 1),
        axis.text = element_text(size=10),
        axis.title = element_text(size=10),
        legend.position = "right"
        ),
  # p2 - weekdays
  df %>% filter(variable_combined==filter_variables$variable_combined, summary==1) %>% group_by(variable_combined, weekday) %>% summarise(avg_value=mean(value))%>% mutate(avg_value=scale(avg_value)) %>%
  ggplot(aes(x=weekday, y=variable_combined, fill=avg_value)) +
  geom_tile() +
  theme_minimal() +
  scale_fill_distiller(palette = "Reds", direction = 1) +
  labs(x="Weekday", y="Air Pollutants") +
  theme(axis.text = element_text(size=10),
        axis.title = element_text(size=10),
        legend.position = "none"
    ),
    # p3 - months
  df %>% filter(variable_combined==filter_variables$variable_combined, summary==1) %>% group_by(variable_combined, month_name) %>% summarise(avg_value=mean(value))%>% mutate(avg_value=scale(avg_value)) %>%
  ggplot(aes(x=month_name, y=variable_combined, fill=avg_value)) +
  geom_tile() +
  theme_minimal() +
  scale_fill_distiller(palette = "Reds", direction = 1) +
  labs(x="Month", y="Air Pollutants") +
  theme(axis.text = element_text(size=10),
        axis.title = element_text(size=10),
        legend.position = "none"
    ),
  nrow = 3)
## `summarise()` has grouped output by 'variable_combined'. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'variable_combined'. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'variable_combined'. You can override using the `.groups` argument.

6 comparision of emissions across Germany to Kiel and other Cities YTD.

  • data from 2020 -> 2021 filtered to YTD
  • complete germany
  • extracted some cities with regex
  • summary=1 -> hourly data
  • first plot kiel (other cities) compared to germany, which has nearly zero impact to the overall emissions
  • second plot to show kiel in comparision to other cities
df2 <- df_germany %>% filter(summary==1, variable_combined==filter_variables$variable_combined, date>=now() - years(1)) %>%
  inner_join(sites_germany, by="site") %>%
  mutate(city=ifelse(
  str_detect(site_name, "^[Kk]iel\\b"), "Kiel",
             ifelse(str_detect(site_name, "^[Bb]erlin\\b|^[Bb]\\b"), "Berlin",
                               ifelse(str_detect(site_name, "^[Hh]amburg\\b|^[Hh]{2}\b"), "Hamburg",
                                      ifelse(str_detect(site_name, "^[Mm][üu]e?nchen\\b"), "München",
                                             ifelse(str_detect(site_name, "^[Bb]remen\\b"), "Bremen",
                                                    ifelse(str_detect(site_name, "^[Dd]resden\\b"), "Dresden", 
                                                           ifelse(str_detect(site_name, "^[Hh]ann?over\\b"), "Hannover",
                                                                  ifelse(str_detect(site_name, "^[Kk][öo]e?ln\\b"), "Köln", "Not Defined")))))))))

grid.arrange(
  df2 %>% group_by(month_date, city) %>% summarise(sum_value=sum(value)) %>%
    ggplot(aes(x=month_date, y=sum_value, fill=city)) +
    geom_area() +
    scale_color_brewer(palette = "Paired") +
    labs(x="Date", y=bquote("Sum chemical concentration (" ~ frac(mg,m^3) ~ ")"), title="All chemical concentrations combined in Germany in comparision to Kiel (and other Cities) YTD", colour="Air Pollutants") +
    theme_bw() +
    theme(plot.title = element_text(size=14, face="bold", hjust = .1),
        axis.text = element_text(size=8),
        axis.title = element_text(size=8),
        legend.position = "right",
        legend.key.size = unit(5, 'mm')
        ),
  df2 %>% filter(city!="Not Defined") %>% group_by(month_date, city) %>% summarise(sum_value=sum(value)) %>%
    ggplot(aes(x=month_date, y=sum_value, color=city)) +
    geom_line(size=1) +
    labs(x="Date", y=bquote("Sum chemical concentration (" ~ frac(mg,m^3) ~ ")")) +
    theme_bw() +
    theme(axis.text = element_text(size=8),
        axis.title = element_text(size=8),
        legend.position = "none"
        ),
  nrow=2
)
## `summarise()` has grouped output by 'month_date'. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'month_date'. You can override using the `.groups` argument.